{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Putting it all together with Pipelines"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 5.1,  3.5,  1.4,  0.2],\n",
       "       [ 4.9,  3. ,  nan,  0.2],\n",
       "       [ 4.7,  3.2,  1.3,  0.2],\n",
       "       [ 4.6,  3.1,  nan,  0.2],\n",
       "       [ 5. ,  3.6,  1.4,  0.2]])"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.datasets import load_iris\n",
    "from sklearn.datasets import load_iris\n",
    "import numpy as np\n",
    "iris = load_iris()\n",
    "iris_data = iris.data\n",
    "mask = np.random.binomial(1, .25, iris_data.shape).astype(bool)\n",
    "iris_data[mask] = np.nan\n",
    "iris_data[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-2.60698767, -0.08712965,  0.24363406,  0.22537488],\n",
       "       [-0.62326808,  1.07071559,  0.59357313,  0.00601078],\n",
       "       [-2.80094824,  0.27826148,  0.01939741,  0.02894031],\n",
       "       [-0.72165678,  1.30496332,  0.51085746,  0.17514322],\n",
       "       [-2.64378029, -0.02318272,  0.23004317,  0.34525975]])"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn import pipeline, preprocessing, decomposition\n",
    "\n",
    "\n",
    "pca = decomposition.PCA()\n",
    "imputer = preprocessing.Imputer()\n",
    "\n",
    "\n",
    "pipe = pipeline.Pipeline([('imputer', imputer), ('pca', pca)])\n",
    "iris_data_transformed = pipe.fit_transform(iris_data)\n",
    "iris_data_transformed[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('imputer',\n",
       "  Imputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbose=0)),\n",
       " ('pca',\n",
       "  PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,\n",
       "    svd_solver='auto', tol=0.0, whiten=False))]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pipe2 = pipeline.make_pipeline(imputer, pca)\n",
    "pipe2.steps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-2.60698767, -0.08712965,  0.24363406,  0.22537488],\n",
       "       [-0.62326808,  1.07071559,  0.59357313,  0.00601078],\n",
       "       [-2.80094824,  0.27826148,  0.01939741,  0.02894031],\n",
       "       [-0.72165678,  1.30496332,  0.51085746,  0.17514322],\n",
       "       [-2.64378029, -0.02318272,  0.23004317,  0.34525975]])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris_data_transformed2 = pipe2.fit_transform(iris_data)\n",
    "iris_data_transformed2[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Pipeline(memory=None,\n",
       "     steps=[('imputer', Imputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbose=0)), ('pca', PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,\n",
       "  svd_solver='auto', tol=0.0, whiten=False))])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pipe2.set_params(pca__n_components=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-2.60698767, -0.08712965],\n",
       "       [-0.62326808,  1.07071559],\n",
       "       [-2.80094824,  0.27826148],\n",
       "       [-0.72165678,  1.30496332],\n",
       "       [-2.64378029, -0.02318272]])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris_data_transformed3 = pipe2.fit_transform(iris_data)\n",
    "iris_data_transformed3[:5]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
